# =======================================================
# Problem 1: CryptoCurrencies & R MarkDown
# =======================================================
all_crypto <- read.csv("crypto-markets.csv")
# Structure of the CSV File
str(all_crypto)
## 'data.frame': 748363 obs. of 13 variables:
## $ slug : Factor w/ 1586 levels "0x","1337coin",..: 151 151 151 151 151 151 151 151 151 151 ...
## $ symbol : Factor w/ 1553 levels "$$$","$PAC","1337",..: 211 211 211 211 211 211 211 211 211 211 ...
## $ name : Factor w/ 1584 levels "0x","1World",..: 149 149 149 149 149 149 149 149 149 149 ...
## $ date : Factor w/ 1793 levels "2013-04-28","2013-04-29",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ ranknow : int 1 1 1 1 1 1 1 1 1 1 ...
## $ open : num 135 134 144 139 116 ...
## $ high : num 136 147 147 140 126 ...
## $ low : num 132.1 134 134.1 107.7 92.3 ...
## $ close : num 134 145 139 117 105 ...
## $ volume : num 0 0 0 0 0 0 0 0 0 0 ...
## $ market : num 1.50e+09 1.49e+09 1.60e+09 1.54e+09 1.29e+09 ...
## $ close_ratio: num 0.544 0.781 0.384 0.288 0.388 ...
## $ spread : num 3.88 13.49 12.88 32.17 33.32 ...
# ==============================================================
# Pre-processing
# ==============================================================
# Get the cryptocurrencies that we want
# ==============================================================
# BTC, ETH, XRP, BCH, LTC
# ==============================================================
btc <- all_crypto[all_crypto$symbol == 'BTC',]
eth <- all_crypto[all_crypto$symbol == 'ETH',]
xrp <- all_crypto[all_crypto$symbol == 'XRP',]
bch <- all_crypto[all_crypto$symbol == 'BCH',]
ltc <- all_crypto[all_crypto$symbol == 'LTC',]
# Filter the columns:
# ===============================================================
# Column we need : symbol, date, open, close
# ===============================================================
btc_final <- data.frame(btc$symbol, btc$date, btc$open, btc$close)
colnames(btc_final) <- c("symbol", "date", "open", "close")
eth_final <- data.frame(eth$symbol, eth$date, eth$open, eth$close)
colnames(eth_final) <- c("symbol", "date", "open", "close")
xrp_final <- data.frame(xrp$symbol, xrp$date, xrp$open, xrp$close)
colnames(xrp_final) <- c("symbol", "date", "open", "close")
bch_final <- data.frame(bch$symbol, bch$date, bch$open, bch$close)
colnames(bch_final) <- c("symbol", "date", "open", "close")
ltc_final <- data.frame(ltc$symbol, ltc$date, ltc$open, ltc$close)
colnames(ltc_final) <- c("symbol", "date", "open", "close")
# Format the date & symbol
# ================================================================
# Current implementation is Factor
# ================================================================
btc_final$date <- as.Date(as.factor(btc_final$date))
btc_final$symbol <- as.character(as.factor(btc_final$symbol))
eth_final$date <- as.Date(as.factor(eth_final$date))
eth_final$symbol <- as.character(as.factor(eth_final$symbol))
xrp_final$date <- as.Date(as.factor(xrp_final$date))
xrp_final$symbol <- as.character(as.factor(xrp_final$symbol))
bch_final$date <- as.Date(as.factor(bch_final$date))
bch_final$symbol <- as.character(as.factor(bch_final$symbol))
ltc_final$date <- as.Date(as.factor(ltc_final$date))
ltc_final$symbol <- as.character(as.factor(ltc_final$symbol))
# ==============================================================
# Sorting the date
# ==============================================================
btc_sorted_final <- btc_final[btc_final$date >= "2017-04-01" & btc_final$date <= "2018-04-15",]
eth_sorted_final <- eth_final[eth_final$date >= "2017-04-01" & eth_final$date <= "2018-04-15",]
xrp_sorted_final <- xrp_final[xrp_final$date >= "2017-04-01" & xrp_final$date <= "2018-04-15",]
bch_sorted_final <- bch_final[bch_final$date >= "2017-04-01" & bch_final$date <= "2018-04-15",]
ltc_sorted_final <- ltc_final[ltc_final$date >= "2017-04-01" & ltc_final$date <= "2018-04-15",]
# ===============================================================
# Data Preprocessing Done!
# ===============================================================
library("ggplot2")
library("tseries")
library("forecast")
# ================================================================
# BITCOIN!
# ================================================================
# plot open price
ggplot() +
geom_line(data = btc_sorted_final, aes(x = date, y = open, color="Close Price"))

summary(btc_sorted_final$open)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1072 2564 4389 6318 9384 19476
# plot closed price
ggplot() +
geom_line(data = btc_sorted_final, aes(x = date, y = close, color="Close Price"))

summary(btc_sorted_final$close)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1080 2573 4404 6338 9366 19497
# I know that in the month of December 2017 Bitcoin prices surges in a ridiculous rate.
# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now.
btc_latest_model <- btc_sorted_final[btc_sorted_final$date >= "2017-04-15" & btc_sorted_final$date <= "2018-04-15",]
btc_latest_model$open_ma = ma(btc_latest_model$open, order=7) # Weekly Moving Average
btc_latest_model$open_ma30 = ma(btc_latest_model$open, order=30) # Monthly Moving Average
btc_latest_model$close_ma = ma(btc_latest_model$close, order=7) # Weekly Moving Average
btc_latest_model$close_ma30 = ma(btc_latest_model$close, order=30) # Monthly Moving Average
ggplot() +
geom_line(data = btc_latest_model, aes(x = date, y = open, color="open prices")) +
geom_line(data = btc_latest_model, aes(x = date, y = open_ma, color="weekly ma")) +
geom_line(data = btc_latest_model, aes(x = date, y = open_ma30, color="monthly ma"))
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

ggplot() +
geom_line(data = btc_latest_model, aes(x = date, y = open, color="open prices")) +
geom_line(data = btc_latest_model, aes(x = date, y = close_ma, color="weekly ma")) +
geom_line(data = btc_latest_model, aes(x = date, y = close_ma30, color="monthly ma"))
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

# ============================================================================
# Predict Bitcoin Open Price
# ============================================================================
btc_open_ma <- ts(na.omit(btc_latest_model$open_ma))
# Stationarity
adf.test(btc_open_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: btc_open_ma
## Dickey-Fuller = -1.4587, Lag order = 6, p-value = 0.805
## alternative hypothesis: stationary
# Stop if p-value < 0.05
btcLog <- log(btc_open_ma)
plot(btcLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(btcLog)
##
## Augmented Dickey-Fuller Test
##
## data: btcLog
## Dickey-Fuller = -1.1303, Lag order = 6, p-value = 0.9166
## alternative hypothesis: stationary
# Stop if p-value < 0.05
btcLogDiff <- diff(btcLog)
plot(btcLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(btcLogDiff)
## Warning in adf.test(btcLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: btcLogDiff
## Dickey-Fuller = -6.6834, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05
# ==========================================================
# ACF and PACF
# ==========================================================
acf(btcLogDiff, main='ACF For BTC Differenced Series')

# q is 5
pacf(btcLogDiff, main='PACF For BTC Differenced Series')

# p is 4
# Arima Based on guessing
arimaFit <- arima(btcLogDiff, order = c(4,0,5))
arimaFit # check the coefficients
##
## Call:
## arima(x = btcLogDiff, order = c(4, 0, 5))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## -0.6083 0.1646 0.5395 0.2697 1.7730 1.6395 0.8007 -0.0837
## s.e. 0.1116 0.0966 0.0636 0.0657 0.1037 0.2138 0.2461 0.1887
## ma5 intercept
## -0.4041 0.0059
## s.e. 0.0884 0.0037
##
## sigma^2 estimated as 8.54e-05: log likelihood = 1100.35, aic = -2178.69
plot(arimaFit)

arimaFitFC <- forecast(btcLogDiff, model = arimaFit, h = 10)
plot(btcLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters
auto.arima(btcLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -2091.709
## ARIMA(0,0,0) with non-zero mean : -1634.916
## ARIMA(1,0,0) with non-zero mean : -2090.902
## ARIMA(0,0,1) with non-zero mean : -1889.013
## ARIMA(0,0,0) with zero mean : -1612.355
## ARIMA(2,0,1) with non-zero mean : -2088.604
## ARIMA(1,0,2) with non-zero mean : -2090.058
## ARIMA(2,0,2) with non-zero mean : -2086.859
## ARIMA(1,0,1) with zero mean : -2091.609
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,1) with non-zero mean : -2091.342
##
## Best model: ARIMA(1,0,1) with non-zero mean
## Series: btcLogDiff
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.8335 0.1019 0.0059
## s.e. 0.0341 0.0614 0.0038
##
## sigma^2 estimated as 0.0001181: log likelihood=1049.67
## AIC=-2091.34 AICc=-2091.22 BIC=-2076.05
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(btcLogDiff, order = c(1,0,1))
arimaOpt
##
## Call:
## arima(x = btcLogDiff, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.8335 0.1019 0.0059
## s.e. 0.0341 0.0614 0.0038
##
## sigma^2 estimated as 0.000117: log likelihood = 1049.67, aic = -2091.34
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(btcLogDiff, model = arimaOpt, h = 10)
plot(btcLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(btcLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(btcLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -2091.709
## ARIMA(0,0,0) with non-zero mean : -1634.916
## ARIMA(1,0,0) with non-zero mean : -2090.902
## ARIMA(0,0,1) with non-zero mean : -1889.013
## ARIMA(0,0,0) with zero mean : -1612.355
## ARIMA(2,0,1) with non-zero mean : -2088.604
## ARIMA(1,0,2) with non-zero mean : -2090.058
## ARIMA(2,0,2) with non-zero mean : -2086.859
## ARIMA(1,0,1) with zero mean : -2091.609
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,1) with non-zero mean : -2091.342
##
## Best model: ARIMA(1,0,1) with non-zero mean
## Series: btcLogDiff
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.8335 0.1019 0.0059
## s.e. 0.0341 0.0614 0.0038
##
## sigma^2 estimated as 0.0001181: log likelihood=1049.67
## AIC=-2091.34 AICc=-2091.22 BIC=-2076.05
# Not SEASONAL! :D
# Check the performance/accuracy
tsdisplay(residuals(arimaOpt), lag.max = 80)

# ============================================================================
# Predict Bitcoin Close Price
# ============================================================================
btc_close_ma <- ts(na.omit(btc_latest_model$close_ma))
# Stationarity
adf.test(btc_close_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: btc_close_ma
## Dickey-Fuller = -1.4453, Lag order = 6, p-value = 0.8107
## alternative hypothesis: stationary
# Stop if p-value < 0.05
btcCloseLog <- log(btc_close_ma)
plot(btcCloseLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(btcCloseLog)
##
## Augmented Dickey-Fuller Test
##
## data: btcCloseLog
## Dickey-Fuller = -1.0811, Lag order = 6, p-value = 0.9246
## alternative hypothesis: stationary
# Stop if p-value < 0.05
btcCloseLogDiff <- diff(btcCloseLog)
plot(btcCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(btcCloseLogDiff)
## Warning in adf.test(btcCloseLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: btcCloseLogDiff
## Dickey-Fuller = -6.6921, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05
# ==========================================================
# ACF and PACF
# ==========================================================
acf(btcCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2
pacf(btcCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4
# Arima Based on guessing
arimaFit <- arima(btcCloseLogDiff, order = c(4,0,2))
arimaFit # check the coefficients
##
## Call:
## arima(x = btcCloseLogDiff, order = c(4, 0, 2))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 intercept
## 0.4776 -0.3805 0.6350 -0.1131 0.4702 0.9664 0.0059
## s.e. 0.0572 0.0570 0.0578 0.0557 0.0203 0.0230 0.0035
##
## sigma^2 estimated as 0.0001037: log likelihood = 1068.85, aic = -2121.71
plot(arimaFit)

arimaFitFC <- forecast(btcCloseLogDiff, model = arimaFit, h = 10)
plot(btcCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters
auto.arima(btcCloseLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -2085.794
## ARIMA(0,0,0) with non-zero mean : -1633.821
## ARIMA(1,0,0) with non-zero mean : -2085.464
## ARIMA(0,0,1) with non-zero mean : -1886.411
## ARIMA(0,0,0) with zero mean : -1611.426
## ARIMA(2,0,1) with non-zero mean : -2083.142
## ARIMA(1,0,2) with non-zero mean : -2084.015
## ARIMA(2,0,2) with non-zero mean : -2081.276
## ARIMA(1,0,1) with zero mean : -2085.801
## ARIMA(0,0,1) with zero mean : -1870.209
## ARIMA(2,0,1) with zero mean : -2083.185
## ARIMA(1,0,0) with zero mean : -2085.731
## ARIMA(1,0,2) with zero mean : -2083.954
## ARIMA(2,0,2) with zero mean : -2081.274
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,1) with zero mean : -2085.278
##
## Best model: ARIMA(1,0,1) with zero mean
## Series: btcCloseLogDiff
## ARIMA(1,0,1) with zero mean
##
## Coefficients:
## ar1 ma1
## 0.8461 0.0876
## s.e. 0.0327 0.0616
##
## sigma^2 estimated as 0.0001206: log likelihood=1045.64
## AIC=-2085.28 AICc=-2085.21 BIC=-2073.81
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(btcCloseLogDiff, order = c(1,0,1))
arimaOpt
##
## Call:
## arima(x = btcCloseLogDiff, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.8333 0.0937 0.0059
## s.e. 0.0343 0.0621 0.0038
##
## sigma^2 estimated as 0.0001191: log likelihood = 1046.72, aic = -2085.44
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(btcCloseLogDiff, model = arimaOpt, h = 10)
plot(btcCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(btcCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(btcCloseLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -2085.794
## ARIMA(0,0,0) with non-zero mean : -1633.821
## ARIMA(1,0,0) with non-zero mean : -2085.464
## ARIMA(0,0,1) with non-zero mean : -1886.411
## ARIMA(0,0,0) with zero mean : -1611.426
## ARIMA(2,0,1) with non-zero mean : -2083.142
## ARIMA(1,0,2) with non-zero mean : -2084.015
## ARIMA(2,0,2) with non-zero mean : -2081.276
## ARIMA(1,0,1) with zero mean : -2085.801
## ARIMA(0,0,1) with zero mean : -1870.209
## ARIMA(2,0,1) with zero mean : -2083.185
## ARIMA(1,0,0) with zero mean : -2085.731
## ARIMA(1,0,2) with zero mean : -2083.954
## ARIMA(2,0,2) with zero mean : -2081.274
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,1) with zero mean : -2085.278
##
## Best model: ARIMA(1,0,1) with zero mean
## Series: btcCloseLogDiff
## ARIMA(1,0,1) with zero mean
##
## Coefficients:
## ar1 ma1
## 0.8461 0.0876
## s.e. 0.0327 0.0616
##
## sigma^2 estimated as 0.0001206: log likelihood=1045.64
## AIC=-2085.28 AICc=-2085.21 BIC=-2073.81
# Not SEASONAL! :D
# Ethereum
# plot
ggplot() +
geom_line(data = eth_sorted_final, aes(x = date, y = open, color="Open Price"))

summary(eth_sorted_final$open)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 41.8 231.7 308.1 432.0 683.3 1397.5
# I know that in the month of December 2017 Crypto prices surges in a ridiculous rate.
# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now.
eth_latest_model <- eth_sorted_final[eth_sorted_final$date >= "2017-11-15" & eth_sorted_final$date <= "2018-04-15",]
eth_latest_model$open_ma = ma(eth_latest_model$open, order=7) # Weekly Moving Average
eth_latest_model$open_ma30 = ma(eth_latest_model$open, order=30) # Monthly Moving Average
eth_latest_model$close_ma = ma(eth_latest_model$close, order=7) # Weekly Moving Average
eth_latest_model$close_ma30 = ma(eth_latest_model$close, order=30) # Monthly Moving Average
ggplot() +
geom_line(data = eth_latest_model, aes(x = date, y = open, color="open Price")) +
geom_line(data = eth_latest_model, aes(x = date, y = open_ma, color="weekly moving average") ) +
geom_line(data = eth_latest_model, aes(x = date, y = open_ma30, color="weekly moving average") )
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

# ==================================================================================
# Ethereum Open Price
# ==================================================================================
eth_open_ma <- ts(na.omit(eth_latest_model$open_ma))
# From the Decomposed data, we can see that it is very seasonal, and there's trend is going down
# Stationarity
adf.test(eth_open_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: eth_open_ma
## Dickey-Fuller = -0.9951, Lag order = 4, p-value = 0.9356
## alternative hypothesis: stationary
# Stop if p-value < 0.05
# Stationarize the Time Series before fitting ARIMA
# Desirable Stationarity properties are as follows:
# -- Time-independent Variance
# -- Time-independent Mean
# -- Time-independent Autocorrelation
ethLog <- log(eth_open_ma)
plot(ethLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(ethLog)
##
## Augmented Dickey-Fuller Test
##
## data: ethLog
## Dickey-Fuller = -0.66433, Lag order = 4, p-value = 0.9713
## alternative hypothesis: stationary
ethLogDiff <- diff(ethLog)
plot(ethLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(ethLogDiff)
## Warning in adf.test(ethLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ethLogDiff
## Dickey-Fuller = -4.2742, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# ==========================================================
# ACF and PACF
# ==========================================================
acf(ethLogDiff, main='ACF For BTC Differenced Series')

# q is 5
pacf(ethLogDiff, main='PACF For BTC Differenced Series')

# p is 4
# Arima Based on guessing
arimaFit <- arima(ethLogDiff, order = c(4,0,5))
arimaFit # check the coefficients
##
## Call:
## arima(x = ethLogDiff, order = c(4, 0, 5))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.2276 0.7698 0.3351 -0.3762 1.0056 0.1205 -0.5684 -0.5570
## s.e. 0.2298 0.1980 0.1830 0.1923 0.2733 0.4945 0.2411 0.2976
## ma5 intercept
## -0.5582 0.0035
## s.e. 0.1995 0.0094
##
## sigma^2 estimated as 0.000136: log likelihood = 372.19, aic = -722.37
plot(arimaFit)

arimaFitFC <- forecast(ethLogDiff, model = arimaFit, h = 10)
plot(ethLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Residuals is okay! but the ACF and PACF do have one or two lines coming out!
# Find the "optimal" ARIMA parameters
auto.arima(ethLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## ARIMA(1,0,1) with non-zero mean : -712.6431
## ARIMA(0,0,0) with non-zero mean : -522.6317
## ARIMA(1,0,0) with non-zero mean : -713.8077
## ARIMA(0,0,1) with non-zero mean : -619.4417
## ARIMA(0,0,0) with zero mean : -522.556
## ARIMA(2,0,0) with non-zero mean : -712.8497
## ARIMA(2,0,1) with non-zero mean : -716.0577
## ARIMA(2,0,1) with zero mean : -717.7185
## ARIMA(1,0,1) with zero mean : -714.5004
## ARIMA(3,0,1) with zero mean : -717.2476
## ARIMA(2,0,0) with zero mean : -714.7007
## ARIMA(2,0,2) with zero mean : -717.979
## ARIMA(3,0,3) with zero mean : Inf
## ARIMA(2,0,2) with non-zero mean : -716.3345
## ARIMA(1,0,2) with zero mean : -713.7408
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(2,0,3) with zero mean : -719.6245
## ARIMA(3,0,4) with zero mean : -726.2259
## ARIMA(3,0,4) with non-zero mean : -724.4744
## ARIMA(2,0,4) with zero mean : -724.0828
## ARIMA(4,0,4) with zero mean : -722.476
## ARIMA(3,0,5) with zero mean : -732.9367
## ARIMA(3,0,5) with non-zero mean : -731.1599
## ARIMA(2,0,5) with zero mean : -723.5901
## ARIMA(4,0,5) with zero mean : Inf
##
## Best model: ARIMA(3,0,5) with zero mean
## Series: ethLogDiff
## ARIMA(3,0,5) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 ma4 ma5
## -0.3620 0.3591 0.5579 1.5826 1.2777 0.4287 -0.1296 -0.3796
## s.e. 0.1256 0.0979 0.0888 0.1385 0.2462 0.2507 0.2188 0.1230
##
## sigma^2 estimated as 0.0001395: log likelihood=375.47
## AIC=-732.94 AICc=-731.36 BIC=-707.55
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(ethLogDiff, order = c(3,0,5))
arimaOpt
##
## Call:
## arima(x = ethLogDiff, order = c(3, 0, 5))
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 ma4 ma5
## -0.3660 0.3544 0.5552 1.5854 1.2832 0.4341 -0.1266 -0.3788
## s.e. 0.1261 0.0990 0.0889 0.1395 0.2479 0.2527 0.2194 0.1230
## intercept
## 0.0039
## s.e. 0.0082
##
## sigma^2 estimated as 0.0001302: log likelihood = 375.58, aic = -731.16
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(ethLogDiff, model = arimaOpt, h = 10)
plot(ethLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals((arimaOpt), lag.max = 80))

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(ethLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(ethLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## ARIMA(1,0,1) with non-zero mean : -712.6431
## ARIMA(0,0,0) with non-zero mean : -522.6317
## ARIMA(1,0,0) with non-zero mean : -713.8077
## ARIMA(0,0,1) with non-zero mean : -619.4417
## ARIMA(0,0,0) with zero mean : -522.556
## ARIMA(2,0,0) with non-zero mean : -712.8497
## ARIMA(2,0,1) with non-zero mean : -716.0577
## ARIMA(2,0,1) with zero mean : -717.7185
## ARIMA(1,0,1) with zero mean : -714.5004
## ARIMA(3,0,1) with zero mean : -717.2476
## ARIMA(2,0,0) with zero mean : -714.7007
## ARIMA(2,0,2) with zero mean : -717.979
## ARIMA(3,0,3) with zero mean : Inf
## ARIMA(2,0,2) with non-zero mean : -716.3345
## ARIMA(1,0,2) with zero mean : -713.7408
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(2,0,3) with zero mean : -719.6245
## ARIMA(3,0,4) with zero mean : -726.2259
## ARIMA(3,0,4) with non-zero mean : -724.4744
## ARIMA(2,0,4) with zero mean : -724.0828
## ARIMA(4,0,4) with zero mean : -722.476
## ARIMA(3,0,5) with zero mean : -732.9367
## ARIMA(3,0,5) with non-zero mean : -731.1599
## ARIMA(2,0,5) with zero mean : -723.5901
## ARIMA(4,0,5) with zero mean : Inf
##
## Best model: ARIMA(3,0,5) with zero mean
## Series: ethLogDiff
## ARIMA(3,0,5) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 ma4 ma5
## -0.3620 0.3591 0.5579 1.5826 1.2777 0.4287 -0.1296 -0.3796
## s.e. 0.1256 0.0979 0.0888 0.1385 0.2462 0.2507 0.2188 0.1230
##
## sigma^2 estimated as 0.0001395: log likelihood=375.47
## AIC=-732.94 AICc=-731.36 BIC=-707.55
# It is not seasonal!
# ============================================================================
# Predict ETHEREUM Close Price
# ============================================================================
eth_close_ma <- ts(na.omit(eth_latest_model$close_ma))
# Stationarity
adf.test(eth_close_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: eth_close_ma
## Dickey-Fuller = -1.1052, Lag order = 4, p-value = 0.9183
## alternative hypothesis: stationary
# Stop if p-value < 0.05
ethCloseLog <- log(eth_close_ma)
plot(ethCloseLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(ethCloseLog)
##
## Augmented Dickey-Fuller Test
##
## data: ethCloseLog
## Dickey-Fuller = -0.87351, Lag order = 4, p-value = 0.9527
## alternative hypothesis: stationary
# Stop if p-value < 0.05
ethCloseLogDiff <- diff(ethCloseLog)
plot(ethCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(ethCloseLogDiff)
## Warning in adf.test(ethCloseLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ethCloseLogDiff
## Dickey-Fuller = -4.2208, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05
# ==========================================================
# ACF and PACF
# ==========================================================
acf(ethCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2
pacf(ethCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4
# Arima Based on guessing
arimaFit <- arima(ethCloseLogDiff, order = c(4,0,2))
arimaFit # check the coefficients
##
## Call:
## arima(x = ethCloseLogDiff, order = c(4, 0, 2))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 intercept
## 0.0576 0.1352 0.7262 -0.2832 0.9489 0.7566 0.0038
## s.e. 0.3105 0.0970 0.1293 0.1253 0.3478 0.1561 0.0081
##
## sigma^2 estimated as 0.0001577: log likelihood = 365.67, aic = -715.34
plot(arimaFit)

arimaFitFC <- forecast(ethCloseLogDiff, model = arimaFit, h = 10)
plot(ethCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters
auto.arima(ethCloseLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## ARIMA(1,0,1) with non-zero mean : -706.5708
## ARIMA(0,0,0) with non-zero mean : -522.1272
## ARIMA(1,0,0) with non-zero mean : -708.0859
## ARIMA(0,0,1) with non-zero mean : -617.4429
## ARIMA(0,0,0) with zero mean : -522.1977
## ARIMA(2,0,0) with non-zero mean : -706.6674
## ARIMA(2,0,1) with non-zero mean : -709.0727
## ARIMA(2,0,1) with zero mean : -710.7205
## ARIMA(1,0,1) with zero mean : -708.3808
## ARIMA(3,0,1) with zero mean : -710.3137
## ARIMA(2,0,0) with zero mean : -708.4748
## ARIMA(2,0,2) with zero mean : -711.176
## ARIMA(3,0,3) with zero mean : -718.3065
## ARIMA(3,0,3) with non-zero mean : -716.514
## ARIMA(2,0,3) with zero mean : -713.25
## ARIMA(4,0,3) with zero mean : -719.4527
## ARIMA(4,0,2) with zero mean : -717.1257
## ARIMA(4,0,4) with zero mean : -716.2337
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(5,0,4) with zero mean : -733.8654
## ARIMA(5,0,4) with non-zero mean : -732.0501
## ARIMA(5,0,3) with zero mean : -718.2465
## ARIMA(5,0,5) with zero mean : -731.9022
##
## Best model: ARIMA(5,0,4) with zero mean
## Series: ethCloseLogDiff
## ARIMA(5,0,4) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3
## -1.1183 -0.2692 0.6103 0.6209 0.1395 2.2771 2.6961 1.9560
## s.e. 0.1363 0.1628 0.1302 0.1638 0.1199 0.0971 0.1800 0.1758
## ma4
## 0.7806
## s.e. 0.0854
##
## sigma^2 estimated as 0.0001384: log likelihood=376.93
## AIC=-733.87 AICc=-731.92 BIC=-705.66
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(ethCloseLogDiff, order = c(1,0,1))
arimaOpt
##
## Call:
## arima(x = ethCloseLogDiff, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.8651 0.0657 0.0040
## s.e. 0.0483 0.0949 0.0091
##
## sigma^2 estimated as 0.0001818: log likelihood = 357.29, aic = -706.57
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(ethCloseLogDiff, model = arimaOpt, h = 10)
plot(ethCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(ethCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(ethCloseLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## ARIMA(1,0,1) with non-zero mean : -706.5708
## ARIMA(0,0,0) with non-zero mean : -522.1272
## ARIMA(1,0,0) with non-zero mean : -708.0859
## ARIMA(0,0,1) with non-zero mean : -617.4429
## ARIMA(0,0,0) with zero mean : -522.1977
## ARIMA(2,0,0) with non-zero mean : -706.6674
## ARIMA(2,0,1) with non-zero mean : -709.0727
## ARIMA(2,0,1) with zero mean : -710.7205
## ARIMA(1,0,1) with zero mean : -708.3808
## ARIMA(3,0,1) with zero mean : -710.3137
## ARIMA(2,0,0) with zero mean : -708.4748
## ARIMA(2,0,2) with zero mean : -711.176
## ARIMA(3,0,3) with zero mean : -718.3065
## ARIMA(3,0,3) with non-zero mean : -716.514
## ARIMA(2,0,3) with zero mean : -713.25
## ARIMA(4,0,3) with zero mean : -719.4527
## ARIMA(4,0,2) with zero mean : -717.1257
## ARIMA(4,0,4) with zero mean : -716.2337
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(5,0,4) with zero mean : -733.8654
## ARIMA(5,0,4) with non-zero mean : -732.0501
## ARIMA(5,0,3) with zero mean : -718.2465
## ARIMA(5,0,5) with zero mean : -731.9022
##
## Best model: ARIMA(5,0,4) with zero mean
## Series: ethCloseLogDiff
## ARIMA(5,0,4) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3
## -1.1183 -0.2692 0.6103 0.6209 0.1395 2.2771 2.6961 1.9560
## s.e. 0.1363 0.1628 0.1302 0.1638 0.1199 0.0971 0.1800 0.1758
## ma4
## 0.7806
## s.e. 0.0854
##
## sigma^2 estimated as 0.0001384: log likelihood=376.93
## AIC=-733.87 AICc=-731.92 BIC=-705.66
# Not SEASONAL! :D
# Ripple
# plot
ggplot() +
geom_line(data = xrp_sorted_final, aes(x = date, y = open, color="Open Price"))

summary(xrp_sorted_final$open)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.02174 0.19240 0.24285 0.49641 0.72055 3.36000
# I know that in the month of December 2017 Crypto prices surges in a ridiculous rate.
# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now.
xrp_latest_model <- xrp_sorted_final[xrp_sorted_final$date >= "2017-04-15" & xrp_sorted_final$date <= "2018-04-15",]
xrp_latest_model$open_ma = ma(xrp_latest_model$open, order=7) # Weekly Moving Average
xrp_latest_model$open_ma30 = ma(xrp_latest_model$open, order=30) # Monthly Moving Average
xrp_latest_model$close_ma = ma(xrp_latest_model$close, order=7)
xrp_latest_model$close_ma30 = ma(xrp_latest_model$close, order=30)
ggplot() +
geom_line(data = xrp_latest_model, aes(x = date, y = open, color="open Price")) +
geom_line(data = xrp_latest_model, aes(x = date, y = open_ma, color="weekly moving average") ) +
geom_line(data = xrp_latest_model, aes(x = date, y = open_ma30, color="weekly moving average") )
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

# ==================================================================================
# Ripple Open Price
# ==================================================================================
xrp_open_ma <- ts(na.omit(xrp_latest_model$open_ma))
# From the Decomposed data, we can see that it is very seasonal, and there's trend is going down
# Stationarity
adf.test(xrp_open_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: xrp_open_ma
## Dickey-Fuller = -2.0655, Lag order = 6, p-value = 0.549
## alternative hypothesis: stationary
# Stop if p-value < 0.05
xrpLog <- log(xrp_open_ma)
plot(xrpLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(xrpLog)
##
## Augmented Dickey-Fuller Test
##
## data: xrpLog
## Dickey-Fuller = -2.8887, Lag order = 6, p-value = 0.2018
## alternative hypothesis: stationary
xrpLogDiff <- diff(xrpLog)
plot(xrpLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(xrpLogDiff)
## Warning in adf.test(xrpLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: xrpLogDiff
## Dickey-Fuller = -5.4103, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# ==========================================================
# ACF and PACF
# ==========================================================
acf(xrpLogDiff, main='ACF For BTC Differenced Series')

# q is 1
pacf(xrpLogDiff, main='PACF For BTC Differenced Series')

# p is 3
# Arima Based on guessing
arimaFit <- arima(xrpLogDiff, order = c(3,0,1))
arimaFit # check the coefficients
##
## Call:
## arima(x = xrpLogDiff, order = c(3, 0, 1))
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ma1 intercept
## 0.5391 0.3065 0.0048 0.3382 0.0085
## s.e. NaN NaN NaN NaN 0.0102
##
## sigma^2 estimated as 0.0004648: log likelihood = 816.52, aic = -1621.05
plot(arimaFit)

arimaFitFC <- forecast(xrpLogDiff, model = arimaFit, h = 10)
plot(xrpLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Residuals is okay! but the ACF and PACF do have one or two lines coming out!
# Find the "optimal" ARIMA parameters
auto.arima(xrpLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1625.647
## ARIMA(0,0,0) with non-zero mean : -1101.663
## ARIMA(1,0,0) with non-zero mean : -1627.609
## ARIMA(0,0,1) with non-zero mean : -1361.615
## ARIMA(0,0,0) with zero mean : -1091.572
## ARIMA(2,0,0) with non-zero mean : -1624.791
## ARIMA(2,0,1) with non-zero mean : -1622.886
## ARIMA(1,0,0) with zero mean : -1628.901
## ARIMA(2,0,0) with zero mean : -1626.066
## ARIMA(1,0,1) with zero mean : -1626.954
## ARIMA(2,0,1) with zero mean : -1624.24
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,0) with zero mean : -1628.314
##
## Best model: ARIMA(1,0,0) with zero mean
## Series: xrpLogDiff
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.8906
## s.e. 0.0241
##
## sigma^2 estimated as 0.0004671: log likelihood=816.16
## AIC=-1628.31 AICc=-1628.28 BIC=-1620.67
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(xrpLogDiff, order = c(1,0,0))
arimaOpt
##
## Call:
## arima(x = xrpLogDiff, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.8868 0.0087
## s.e. 0.0246 0.0101
##
## sigma^2 estimated as 0.0004648: log likelihood = 816.5, aic = -1627.01
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(xrpLogDiff, model = arimaOpt, h = 10)
plot(xrpLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals((arimaOpt), lag.max = 80))

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(xrpLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(xrpLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1625.647
## ARIMA(0,0,0) with non-zero mean : -1101.663
## ARIMA(1,0,0) with non-zero mean : -1627.609
## ARIMA(0,0,1) with non-zero mean : -1361.615
## ARIMA(0,0,0) with zero mean : -1091.572
## ARIMA(2,0,0) with non-zero mean : -1624.791
## ARIMA(2,0,1) with non-zero mean : -1622.886
## ARIMA(1,0,0) with zero mean : -1628.901
## ARIMA(2,0,0) with zero mean : -1626.066
## ARIMA(1,0,1) with zero mean : -1626.954
## ARIMA(2,0,1) with zero mean : -1624.24
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,0) with zero mean : -1628.314
##
## Best model: ARIMA(1,0,0) with zero mean
## Series: xrpLogDiff
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.8906
## s.e. 0.0241
##
## sigma^2 estimated as 0.0004671: log likelihood=816.16
## AIC=-1628.31 AICc=-1628.28 BIC=-1620.67
# It is not seasonal!
# ==================================================================================
# Ripple Close Price
# ==================================================================================
xrp_close_ma <- ts(na.omit(xrp_latest_model$close_ma))
# Stationarity
adf.test(xrp_close_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: xrp_close_ma
## Dickey-Fuller = -2.0289, Lag order = 6, p-value = 0.5645
## alternative hypothesis: stationary
# Stop if p-value < 0.05
xrpCloseLog <- log(xrp_close_ma)
plot(xrpCloseLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(xrpCloseLog)
##
## Augmented Dickey-Fuller Test
##
## data: xrpCloseLog
## Dickey-Fuller = -2.8636, Lag order = 6, p-value = 0.2124
## alternative hypothesis: stationary
# Stop if p-value < 0.05
xrpCloseLogDiff <- diff(xrpCloseLog)
plot(xrpCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(xrpCloseLogDiff)
## Warning in adf.test(xrpCloseLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: xrpCloseLogDiff
## Dickey-Fuller = -5.3281, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05
# ==========================================================
# ACF and PACF
# ==========================================================
acf(xrpCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2
pacf(xrpCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4
# Arima Based on guessing
arimaFit <- arima(xrpCloseLogDiff, order = c(4,0,2))
arimaFit # check the coefficients
##
## Call:
## arima(x = xrpCloseLogDiff, order = c(4, 0, 2))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 intercept
## 0.4287 -0.3895 0.7035 -0.0146 0.4708 0.9837 0.0081
## s.e. 0.0555 0.0506 0.0507 0.0553 0.0124 0.0191 0.0098
##
## sigma^2 estimated as 0.0004165: log likelihood = 833.56, aic = -1651.11
plot(arimaFit)

arimaFitFC <- forecast(xrpCloseLogDiff, model = arimaFit, h = 10)
plot(xrpCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters
auto.arima(xrpCloseLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1629.284
## ARIMA(0,0,0) with non-zero mean : -1101.987
## ARIMA(1,0,0) with non-zero mean : -1631.225
## ARIMA(0,0,1) with non-zero mean : -1360.635
## ARIMA(0,0,0) with zero mean : -1091.906
## ARIMA(2,0,0) with non-zero mean : -1628.296
## ARIMA(2,0,1) with non-zero mean : Inf
## ARIMA(1,0,0) with zero mean : -1632.507
## ARIMA(2,0,0) with zero mean : -1629.591
## ARIMA(1,0,1) with zero mean : -1630.585
## ARIMA(2,0,1) with zero mean : -1628.274
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,0) with zero mean : -1631.87
##
## Best model: ARIMA(1,0,0) with zero mean
## Series: xrpCloseLogDiff
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.8918
## s.e. 0.0240
##
## sigma^2 estimated as 0.0004622: log likelihood=817.94
## AIC=-1631.87 AICc=-1631.83 BIC=-1624.22
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(xrpCloseLogDiff, order = c(1,0,1))
arimaOpt
##
## Call:
## arima(x = xrpCloseLogDiff, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.8912 -0.0142 0.0083
## s.e. 0.0269 0.0579 0.0103
##
## sigma^2 estimated as 0.0004599: log likelihood = 818.28, aic = -1628.57
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(xrpCloseLogDiff, model = arimaOpt, h = 10)
plot(xrpCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(xrpCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(xrpCloseLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1629.284
## ARIMA(0,0,0) with non-zero mean : -1101.987
## ARIMA(1,0,0) with non-zero mean : -1631.225
## ARIMA(0,0,1) with non-zero mean : -1360.635
## ARIMA(0,0,0) with zero mean : -1091.906
## ARIMA(2,0,0) with non-zero mean : -1628.296
## ARIMA(2,0,1) with non-zero mean : Inf
## ARIMA(1,0,0) with zero mean : -1632.507
## ARIMA(2,0,0) with zero mean : -1629.591
## ARIMA(1,0,1) with zero mean : -1630.585
## ARIMA(2,0,1) with zero mean : -1628.274
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,0,0) with zero mean : -1631.87
##
## Best model: ARIMA(1,0,0) with zero mean
## Series: xrpCloseLogDiff
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.8918
## s.e. 0.0240
##
## sigma^2 estimated as 0.0004622: log likelihood=817.94
## AIC=-1631.87 AICc=-1631.83 BIC=-1624.22
# Not SEASONAL! :D
# Bitcoin Cash
# plot
ggplot() +
geom_line(data = bch_sorted_final, aes(x = date, y = open, color="Open Price"))

summary(bch_sorted_final$open)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 212.2 439.5 1002.1 1106.7 1489.2 3909.0
# I know that in the month of December 2017 Crypto prices surges in a ridiculous rate.
# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now.
bch_latest_model <- bch_sorted_final[bch_sorted_final$date >= "2017-04-15" & bch_sorted_final$date <= "2018-04-15",]
bch_latest_model$open_ma = ma(bch_latest_model$open, order=7) # Weekly Moving Average
bch_latest_model$open_ma30 = ma(bch_latest_model$open, order=30) # Monthly Moving Average
bch_latest_model$close_ma = ma(bch_latest_model$close, order=7) # Weekly Moving Average
bch_latest_model$close_ma30 = ma(bch_latest_model$close, order=30) # Monthly Moving Average
ggplot() +
geom_line(data = bch_latest_model, aes(x = date, y = open, color="open Price")) +
geom_line(data = bch_latest_model, aes(x = date, y = open_ma, color="weekly moving average") ) +
geom_line(data = bch_latest_model, aes(x = date, y = open_ma30, color="weekly moving average") )
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

bch_open_ma <- ts(na.omit(bch_latest_model$open_ma))
# From the Decomposed data, we can see that it is very seasonal, and there's trend is going down
# Stationarity
adf.test(bch_open_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: bch_open_ma
## Dickey-Fuller = -1.1542, Lag order = 6, p-value = 0.9121
## alternative hypothesis: stationary
# Stop if p-value < 0.05
bchLog <- log(bch_open_ma)
plot(bchLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(bchLog)
##
## Augmented Dickey-Fuller Test
##
## data: bchLog
## Dickey-Fuller = -1.3871, Lag order = 6, p-value = 0.8336
## alternative hypothesis: stationary
bchLogDiff <- diff(bchLog)
plot(bchLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(bchLogDiff)
## Warning in adf.test(bchLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: bchLogDiff
## Dickey-Fuller = -5.7218, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# ==========================================================
# ACF and PACF
# ==========================================================
acf(bchLogDiff, main='ACF For BTC Differenced Series')

# q is 1
pacf(bchLogDiff, main='PACF For BTC Differenced Series')

# p is 4
# Arima Based on guessing
arimaFit <- arima(bchLogDiff, order = c(3,0,3))
arimaFit # check the coefficients
##
## Call:
## arima(x = bchLogDiff, order = c(3, 0, 3))
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 intercept
## -0.5907 0.5417 0.4493 1.7654 0.9148 -0.0469 0.0023
## s.e. 0.1196 0.0775 0.0689 0.1306 0.2337 0.1279 0.0080
##
## sigma^2 estimated as 0.0004321: log likelihood = 582.48, aic = -1148.96
plot(arimaFit)

arimaFitFC <- forecast(bchLogDiff, model = arimaFit, h = 10)
plot(bchLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Residuals is okay! but the ACF and PACF do have one or two lines coming out!
# Find the "optimal" ARIMA parameters
auto.arima(bchLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1122.439
## ARIMA(0,0,0) with non-zero mean : -836.2027
## ARIMA(1,0,0) with non-zero mean : -1114.798
## ARIMA(0,0,1) with non-zero mean : -1003.544
## ARIMA(0,0,0) with zero mean : -836.4944
## ARIMA(2,0,1) with non-zero mean : -1121.726
## ARIMA(1,0,2) with non-zero mean : -1120.511
## ARIMA(2,0,2) with non-zero mean : -1130.211
## ARIMA(2,0,2) with zero mean : -1131.93
## ARIMA(1,0,2) with zero mean : -1122.107
## ARIMA(3,0,2) with zero mean : -1140.436
## ARIMA(3,0,1) with zero mean : -1123.978
## ARIMA(3,0,3) with zero mean : -1144.505
## ARIMA(4,0,4) with zero mean : Inf
## ARIMA(3,0,3) with non-zero mean : -1143.466
## ARIMA(2,0,3) with zero mean : -1142.224
## ARIMA(4,0,3) with zero mean : -1131.712
## ARIMA(3,0,4) with zero mean : -1143.512
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(3,0,3) with zero mean : Inf
## ARIMA(3,0,4) with zero mean : Inf
## ARIMA(3,0,3) with non-zero mean : Inf
## ARIMA(2,0,3) with zero mean : -1136.246
##
## Best model: ARIMA(2,0,3) with zero mean
## Series: bchLogDiff
## ARIMA(2,0,3) with zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## -0.0356 0.7335 1.1905 0.0219 -0.3665
## s.e. 0.0779 0.0574 0.0850 0.1371 0.0990
##
## sigma^2 estimated as 0.0004849: log likelihood=574.12
## AIC=-1136.25 AICc=-1135.88 BIC=-1115.39
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(bchLogDiff, order = c(2,0,3))
arimaOpt
##
## Call:
## arima(x = bchLogDiff, order = c(2, 0, 3))
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 intercept
## -0.0368 0.7324 1.1913 0.0232 -0.3658 0.0023
## s.e. 0.0782 0.0576 0.0852 0.1374 0.0991 0.0084
##
## sigma^2 estimated as 0.0004746: log likelihood = 574.16, aic = -1134.32
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(bchLogDiff, model = arimaOpt, h = 10)
plot(bchLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals((arimaOpt), lag.max = 80))

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(bchLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(bchLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1122.439
## ARIMA(0,0,0) with non-zero mean : -836.2027
## ARIMA(1,0,0) with non-zero mean : -1114.798
## ARIMA(0,0,1) with non-zero mean : -1003.544
## ARIMA(0,0,0) with zero mean : -836.4944
## ARIMA(2,0,1) with non-zero mean : -1121.726
## ARIMA(1,0,2) with non-zero mean : -1120.511
## ARIMA(2,0,2) with non-zero mean : -1130.211
## ARIMA(2,0,2) with zero mean : -1131.93
## ARIMA(1,0,2) with zero mean : -1122.107
## ARIMA(3,0,2) with zero mean : -1140.436
## ARIMA(3,0,1) with zero mean : -1123.978
## ARIMA(3,0,3) with zero mean : -1144.505
## ARIMA(4,0,4) with zero mean : Inf
## ARIMA(3,0,3) with non-zero mean : -1143.466
## ARIMA(2,0,3) with zero mean : -1142.224
## ARIMA(4,0,3) with zero mean : -1131.712
## ARIMA(3,0,4) with zero mean : -1143.512
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(3,0,3) with zero mean : Inf
## ARIMA(3,0,4) with zero mean : Inf
## ARIMA(3,0,3) with non-zero mean : Inf
## ARIMA(2,0,3) with zero mean : -1136.246
##
## Best model: ARIMA(2,0,3) with zero mean
## Series: bchLogDiff
## ARIMA(2,0,3) with zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## -0.0356 0.7335 1.1905 0.0219 -0.3665
## s.e. 0.0779 0.0574 0.0850 0.1371 0.0990
##
## sigma^2 estimated as 0.0004849: log likelihood=574.12
## AIC=-1136.25 AICc=-1135.88 BIC=-1115.39
# ==================================================================================
# Bitcoin Cash Close Price
# ==================================================================================
bch_close_ma <- ts(na.omit(bch_latest_model$close_ma))
# Stationarity
adf.test(bch_close_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: bch_close_ma
## Dickey-Fuller = -1.1749, Lag order = 6, p-value = 0.9087
## alternative hypothesis: stationary
# Stop if p-value < 0.05
bchCloseLog <- log(bch_close_ma)
plot(bchCloseLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(bchCloseLog)
##
## Augmented Dickey-Fuller Test
##
## data: bchCloseLog
## Dickey-Fuller = -1.2939, Lag order = 6, p-value = 0.8728
## alternative hypothesis: stationary
# Stop if p-value < 0.05
bchCloseLogDiff <- diff(bchCloseLog)
plot(bchCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(bchCloseLogDiff)
## Warning in adf.test(bchCloseLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: bchCloseLogDiff
## Dickey-Fuller = -5.8254, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05
# ==========================================================
# ACF and PACF
# ==========================================================
acf(bchCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2
pacf(bchCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4
# Arima Based on guessing
arimaFit <- arima(bchCloseLogDiff, order = c(4,0,2))
arimaFit # check the coefficients
##
## Call:
## arima(x = bchCloseLogDiff, order = c(4, 0, 2))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 intercept
## 1.6189 -1.7356 1.0112 -0.1158 -0.6483 0.8978 0.0039
## s.e. 0.0870 0.1283 0.1220 0.0738 0.0547 0.0549 0.0079
##
## sigma^2 estimated as 0.0004863: log likelihood = 571.16, aic = -1126.32
plot(arimaFit)

arimaFitFC <- forecast(bchCloseLogDiff, model = arimaFit, h = 10)
plot(bchCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters
auto.arima(bchCloseLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1116.936
## ARIMA(0,0,0) with non-zero mean : -839.0271
## ARIMA(1,0,0) with non-zero mean : -1109.056
## ARIMA(0,0,1) with non-zero mean : -1013.714
## ARIMA(0,0,0) with zero mean : -838.9558
## ARIMA(2,0,1) with non-zero mean : -1118.002
## ARIMA(2,0,0) with non-zero mean : -1116.745
## ARIMA(2,0,2) with non-zero mean : -1116.002
## ARIMA(3,0,2) with non-zero mean : -1144.888
## ARIMA(3,0,2) with zero mean : -1146.858
## ARIMA(2,0,2) with zero mean : -1117.547
## ARIMA(4,0,2) with zero mean : -1148.74
## ARIMA(4,0,1) with zero mean : -1118.735
## ARIMA(4,0,3) with zero mean : -1142.473
## ARIMA(3,0,1) with zero mean : -1119.614
## ARIMA(5,0,3) with zero mean : -1131.375
## ARIMA(4,0,2) with non-zero mean : -1146.933
## ARIMA(5,0,2) with zero mean : -1133.322
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(4,0,2) with zero mean : Inf
## ARIMA(4,0,2) with non-zero mean : Inf
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(3,0,2) with non-zero mean : Inf
## ARIMA(4,0,3) with zero mean : Inf
## ARIMA(5,0,2) with zero mean : Inf
## ARIMA(5,0,3) with zero mean : Inf
## ARIMA(3,0,1) with zero mean : -1115.554
##
## Best model: ARIMA(3,0,1) with zero mean
## Series: bchCloseLogDiff
## ARIMA(3,0,1) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 1.0186 -0.3171 0.1178 -0.0218
## s.e. 0.5005 0.4922 0.1110 0.5028
##
## sigma^2 estimated as 0.0005337: log likelihood=562.78
## AIC=-1115.55 AICc=-1115.3 BIC=-1098.17
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(bchCloseLogDiff, order = c(1,0,1))
arimaOpt
##
## Call:
## arima(x = bchCloseLogDiff, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.7377 0.2705 0.0037
## s.e. 0.0522 0.0780 0.0071
##
## sigma^2 estimated as 0.0005265: log likelihood = 562.4, aic = -1116.8
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(bchCloseLogDiff, model = arimaOpt, h = 10)
plot(bchCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(bchCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(bchCloseLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1116.936
## ARIMA(0,0,0) with non-zero mean : -839.0271
## ARIMA(1,0,0) with non-zero mean : -1109.056
## ARIMA(0,0,1) with non-zero mean : -1013.714
## ARIMA(0,0,0) with zero mean : -838.9558
## ARIMA(2,0,1) with non-zero mean : -1118.002
## ARIMA(2,0,0) with non-zero mean : -1116.745
## ARIMA(2,0,2) with non-zero mean : -1116.002
## ARIMA(3,0,2) with non-zero mean : -1144.888
## ARIMA(3,0,2) with zero mean : -1146.858
## ARIMA(2,0,2) with zero mean : -1117.547
## ARIMA(4,0,2) with zero mean : -1148.74
## ARIMA(4,0,1) with zero mean : -1118.735
## ARIMA(4,0,3) with zero mean : -1142.473
## ARIMA(3,0,1) with zero mean : -1119.614
## ARIMA(5,0,3) with zero mean : -1131.375
## ARIMA(4,0,2) with non-zero mean : -1146.933
## ARIMA(5,0,2) with zero mean : -1133.322
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(4,0,2) with zero mean : Inf
## ARIMA(4,0,2) with non-zero mean : Inf
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(3,0,2) with non-zero mean : Inf
## ARIMA(4,0,3) with zero mean : Inf
## ARIMA(5,0,2) with zero mean : Inf
## ARIMA(5,0,3) with zero mean : Inf
## ARIMA(3,0,1) with zero mean : -1115.554
##
## Best model: ARIMA(3,0,1) with zero mean
## Series: bchCloseLogDiff
## ARIMA(3,0,1) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 1.0186 -0.3171 0.1178 -0.0218
## s.e. 0.5005 0.4922 0.1110 0.5028
##
## sigma^2 estimated as 0.0005337: log likelihood=562.78
## AIC=-1115.55 AICc=-1115.3 BIC=-1098.17
# Not SEASONAL! :D
# LiteCoin
# plot
ggplot() +
geom_line(data = ltc_sorted_final, aes(x = date, y = open, color="Open Price"))

summary(ltc_sorted_final$open)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.65 41.59 55.03 95.06 160.79 359.13
# I know that in the month of December 2017 Crypto prices surges in a ridiculous rate.
# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now.
ltc_latest_model <- ltc_sorted_final[ltc_sorted_final$date >= "2017-04-15" & ltc_sorted_final$date <= "2018-04-15",]
ltc_latest_model$open_ma = ma(ltc_latest_model$open, order=7) # Weekly Moving Average
ltc_latest_model$open_ma30 = ma(ltc_latest_model$open, order=30) # Monthly Moving Average
ltc_latest_model$close_ma = ma(ltc_latest_model$close, order=7) # Monthly Moving Average
ltc_latest_model$close_ma30 = ma(ltc_latest_model$close, order=30) # Monthly Moving Average
ggplot() +
geom_line(data = ltc_latest_model, aes(x = date, y = open, color="open Price")) +
geom_line(data = ltc_latest_model, aes(x = date, y = open_ma, color="weekly moving average") ) +
geom_line(data = ltc_latest_model, aes(x = date, y = open_ma30, color="weekly moving average") )
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

ltc_open_ma <- ts(na.omit(ltc_latest_model$open_ma))
# From the Decomposed data, we can see that it is very seasonal, and there's trend is going down
# Stationarity
adf.test(ltc_open_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: ltc_open_ma
## Dickey-Fuller = -2.3929, Lag order = 6, p-value = 0.4109
## alternative hypothesis: stationary
# Stop if p-value < 0.05
ltcLog <- log(ltc_open_ma)
plot(ltcLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(ltcLog)
##
## Augmented Dickey-Fuller Test
##
## data: ltcLog
## Dickey-Fuller = -2.3157, Lag order = 6, p-value = 0.4435
## alternative hypothesis: stationary
ltcLogDiff <- diff(ltcLog)
plot(ltcLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(ltcLogDiff)
## Warning in adf.test(ltcLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ltcLogDiff
## Dickey-Fuller = -7.8222, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# ==========================================================
# ACF and PACF
# ==========================================================
acf(ltcLogDiff, main='ACF For BTC Differenced Series')

# q is 2
pacf(ltcLogDiff, main='PACF For BTC Differenced Series')

# p is 3
# Arima Based on guessing
arimaFit <- arima(ltcLogDiff, order = c(3,0,2))
arimaFit # check the coefficients
##
## Call:
## arima(x = ltcLogDiff, order = c(3, 0, 2))
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 intercept
## 0.7476 0.8167 -0.6655 0.1604 -0.6527 0.0080
## s.e. 0.1409 0.0974 0.1078 0.1566 0.1422 0.0046
##
## sigma^2 estimated as 0.0002809: log likelihood = 901.69, aic = -1789.37
plot(arimaFit)

arimaFitFC <- forecast(ltcLogDiff, model = arimaFit, h = 10)
plot(ltcLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Residuals is okay! but the ACF and PACF do have one or two lines coming out!
# Find the "optimal" ARIMA parameters
auto.arima(ltcLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1791.877
## ARIMA(0,0,0) with non-zero mean : -1334.011
## ARIMA(1,0,0) with non-zero mean : -1789.44
## ARIMA(0,0,1) with non-zero mean : -1583.731
## ARIMA(0,0,0) with zero mean : -1316.386
## ARIMA(2,0,1) with non-zero mean : -1794.232
## ARIMA(2,0,0) with non-zero mean : -1792.172
## ARIMA(2,0,2) with non-zero mean : -1793.571
## ARIMA(3,0,2) with non-zero mean : -1790.972
## ARIMA(2,0,1) with zero mean : -1793.99
## ARIMA(3,0,1) with non-zero mean : -1792.74
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(2,0,1) with non-zero mean : -1791.481
##
## Best model: ARIMA(2,0,1) with non-zero mean
## Series: ltcLogDiff
## ARIMA(2,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 mean
## 1.6832 -0.7314 -0.7600 0.0080
## s.e. 0.1264 0.1051 0.1423 0.0045
##
## sigma^2 estimated as 0.0002859: log likelihood=900.74
## AIC=-1791.48 AICc=-1791.3 BIC=-1772.37
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(ltcLogDiff, order = c(2,0,1))
arimaOpt
##
## Call:
## arima(x = ltcLogDiff, order = c(2, 0, 1))
##
## Coefficients:
## ar1 ar2 ma1 intercept
## 1.6832 -0.7314 -0.7600 0.0080
## s.e. 0.1264 0.1051 0.1423 0.0045
##
## sigma^2 estimated as 0.0002825: log likelihood = 900.74, aic = -1791.48
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(ltcLogDiff, model = arimaOpt, h = 10)
plot(ltcLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals((arimaOpt), lag.max = 80))

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(bchLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(ltcLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1791.877
## ARIMA(0,0,0) with non-zero mean : -1334.011
## ARIMA(1,0,0) with non-zero mean : -1789.44
## ARIMA(0,0,1) with non-zero mean : -1583.731
## ARIMA(0,0,0) with zero mean : -1316.386
## ARIMA(2,0,1) with non-zero mean : -1794.232
## ARIMA(2,0,0) with non-zero mean : -1792.172
## ARIMA(2,0,2) with non-zero mean : -1793.571
## ARIMA(3,0,2) with non-zero mean : -1790.972
## ARIMA(2,0,1) with zero mean : -1793.99
## ARIMA(3,0,1) with non-zero mean : -1792.74
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(2,0,1) with non-zero mean : -1791.481
##
## Best model: ARIMA(2,0,1) with non-zero mean
## Series: ltcLogDiff
## ARIMA(2,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 mean
## 1.6832 -0.7314 -0.7600 0.0080
## s.e. 0.1264 0.1051 0.1423 0.0045
##
## sigma^2 estimated as 0.0002859: log likelihood=900.74
## AIC=-1791.48 AICc=-1791.3 BIC=-1772.37
# It is not seasonal!
# ==================================================================================
# Litecoin Close Price
# ==================================================================================
ltc_close_ma <- ts(na.omit(ltc_latest_model$close_ma))
# Stationarity
adf.test(ltc_close_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: ltc_close_ma
## Dickey-Fuller = -2.4328, Lag order = 6, p-value = 0.3941
## alternative hypothesis: stationary
# Stop if p-value < 0.05
ltcCloseLog <- log(ltc_close_ma)
plot(ltcCloseLog, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(ltcCloseLog)
##
## Augmented Dickey-Fuller Test
##
## data: ltcCloseLog
## Dickey-Fuller = -2.3782, Lag order = 6, p-value = 0.4171
## alternative hypothesis: stationary
# Stop if p-value < 0.05
ltcCloseLogDiff <- diff(ltcCloseLog)
plot(ltcCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

adf.test(ltcCloseLogDiff)
## Warning in adf.test(ltcCloseLogDiff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ltcCloseLogDiff
## Dickey-Fuller = -7.8168, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05
# ==========================================================
# ACF and PACF
# ==========================================================
acf(ltcCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2
pacf(ltcCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4
# Arima Based on guessing
arimaFit <- arima(ltcCloseLogDiff, order = c(4,0,2))
arimaFit # check the coefficients
##
## Call:
## arima(x = ltcCloseLogDiff, order = c(4, 0, 2))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 intercept
## 2.1010 -2.1249 1.1093 -0.2070 -1.2323 0.9996 0.0081
## s.e. 0.0532 0.1103 0.1103 0.0531 0.0246 0.0387 0.0054
##
## sigma^2 estimated as 0.0002514: log likelihood = 917.16, aic = -1818.33
plot(arimaFit)

arimaFitFC <- forecast(ltcCloseLogDiff, model = arimaFit, h = 10)
plot(ltcCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters
auto.arima(ltcCloseLogDiff, # the dataset
seasonal = FALSE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1787.8
## ARIMA(0,0,0) with non-zero mean : -1333.854
## ARIMA(1,0,0) with non-zero mean : -1786.529
## ARIMA(0,0,1) with non-zero mean : -1580.798
## ARIMA(0,0,0) with zero mean : -1316.402
## ARIMA(2,0,1) with non-zero mean : -1789.651
## ARIMA(2,0,0) with non-zero mean : -1787.509
## ARIMA(2,0,2) with non-zero mean : -1788.878
## ARIMA(3,0,2) with non-zero mean : -1785.584
## ARIMA(2,0,1) with zero mean : -1789.305
## ARIMA(3,0,1) with non-zero mean : -1786.759
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(2,0,1) with non-zero mean : -1787.459
##
## Best model: ARIMA(2,0,1) with non-zero mean
## Series: ltcCloseLogDiff
## ARIMA(2,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 mean
## 1.6828 -0.7299 -0.7645 0.0082
## s.e. 0.1271 0.1057 0.1421 0.0046
##
## sigma^2 estimated as 0.0002893: log likelihood=898.73
## AIC=-1787.46 AICc=-1787.28 BIC=-1768.34
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(ltcCloseLogDiff, order = c(1,0,1))
arimaOpt
##
## Call:
## arima(x = ltcCloseLogDiff, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.8282 0.1164 0.0084
## s.e. 0.0350 0.0634 0.0059
##
## sigma^2 estimated as 0.000288: log likelihood = 897.53, aic = -1787.06
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(ltcCloseLogDiff, model = arimaOpt, h = 10)
plot(ltcCloseLogDiff, type="l", col = "lightblue",
lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(ltcCloseLogDiff, type="l", col = "blue",
lwd = 3, xlab = "", ylab = "")

auto.arima(ltcCloseLogDiff, # the dataset
seasonal = TRUE, # seasonality
stationary = TRUE, # stationarity
max.p = 5, max.q = 5, # range of p,q (non-seasonal)
max.P = 5, max.Q = 5, # range of P,Q (seasonal)
max.d = 2, max.D = 2, # range of d,D (differences)
start.p = 1, start.q = 1, # start for stepwise search
start.P = 1, start.Q = 1, # start for stepwise search
ic = "aic", # criteria to compare
stepwise = TRUE, # stepwise selection (faster)
trace = TRUE) # all ARIMA models reported
##
## Fitting models using approximations to speed things up...
##
## ARIMA(1,0,1) with non-zero mean : -1787.8
## ARIMA(0,0,0) with non-zero mean : -1333.854
## ARIMA(1,0,0) with non-zero mean : -1786.529
## ARIMA(0,0,1) with non-zero mean : -1580.798
## ARIMA(0,0,0) with zero mean : -1316.402
## ARIMA(2,0,1) with non-zero mean : -1789.651
## ARIMA(2,0,0) with non-zero mean : -1787.509
## ARIMA(2,0,2) with non-zero mean : -1788.878
## ARIMA(3,0,2) with non-zero mean : -1785.584
## ARIMA(2,0,1) with zero mean : -1789.305
## ARIMA(3,0,1) with non-zero mean : -1786.759
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(2,0,1) with non-zero mean : -1787.459
##
## Best model: ARIMA(2,0,1) with non-zero mean
## Series: ltcCloseLogDiff
## ARIMA(2,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 mean
## 1.6828 -0.7299 -0.7645 0.0082
## s.e. 0.1271 0.1057 0.1421 0.0046
##
## sigma^2 estimated as 0.0002893: log likelihood=898.73
## AIC=-1787.46 AICc=-1787.28 BIC=-1768.34